# Copyright (c) HySoP 2011-2024
#
# This file is part of HySoP software.
# See "https://particle_methods.gricad-pages.univ-grenoble-alpes.fr/hysop-doc/"
# for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABCMeta, abstractmethod
import sympy as sm
import numpy as np
from hysop.tools.htypes import check_instance
from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator
from hysop.backend.device.codegen.base.function_codegen import (
OpenClFunctionCodeGenerator,
)
from hysop.backend.device.codegen.base.variables import (
CodegenVariable,
CodegenVectorClBuiltin,
)
from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict
from hysop.backend.device.codegen.base.statistics import WorkStatistics
from hysop.backend.device.opencl.opencl_types import OpenClTypeGen, basetype
[docs]
class OpenClComplexOperator(OpenClFunctionCodeGenerator, metaclass=ABCMeta):
def __init__(self, typegen, ftype, vectorization, output=None, known_args=None):
assert vectorization in (1, 2, 4, 8)
ftype = basetype(ftype)
vtype = typegen.vtype(ftype, 2 * vectorization)
rtype = typegen.vtype(ftype, 1 * vectorization)
args = self.generate_arguments(ftype, vtype, rtype, vectorization, typegen)
reqs = self.generate_requirements(
ftype, vtype, rtype, vectorization, typegen, args
)
output = self.determine_output_ctype(
ftype, vtype, rtype, vectorization, typegen, args, reqs
)
super().__init__(
basename=self.basename,
output=output,
typegen=typegen,
inline=True,
args=args,
known_args=known_args,
)
self.update_requirements(reqs)
self.ftype = ftype
self.vtype = vtype
self.rtype = rtype
self.vectorization = vectorization
self.gencode()
[docs]
def generate_arguments(self, ftype, vtype, rtype, vectorization, typegen):
return ArgDict()
[docs]
def generate_requirements(self, ftype, vtype, rtype, vectorization, typegen, args):
return WriteOnceDict()
[docs]
def determine_output_ctype(
self, ftype, vtype, rtype, vectorization, typegen, args, reqs
):
return vtype
[docs]
@abstractmethod
def gencode(self):
pass
[docs]
class OpenClComplexUnaryOperator(OpenClComplexOperator):
[docs]
def generate_arguments(self, ftype, vtype, rtype, vectorization, typegen):
args = super().generate_arguments(
ftype=ftype,
vtype=vtype,
rtype=rtype,
vectorization=vectorization,
typegen=typegen,
)
args["a"] = CodegenVectorClBuiltin(
"a", ftype, 2 * vectorization, typegen, add_impl_const=True
)
return args
[docs]
def get_attrs(self):
return (
self,
self.typegen,
self.vectorization,
self.ftype,
self.vtype,
self.rtype,
self.args["a"],
)
[docs]
class OpenClComplexBinaryOperator(OpenClComplexOperator):
[docs]
def generate_arguments(self, ftype, vtype, rtype, vectorization, typegen):
args = super().generate_arguments(
ftype=ftype,
vtype=vtype,
rtype=rtype,
vectorization=vectorization,
typegen=typegen,
)
args["lhs"] = CodegenVectorClBuiltin(
"lhs", ftype, 2 * vectorization, typegen, add_impl_const=True
)
args["rhs"] = CodegenVectorClBuiltin(
"rhs", ftype, 2 * vectorization, typegen, add_impl_const=True
)
return args
[docs]
def get_attrs(self):
return (
self,
self.typegen,
self.vectorization,
self.ftype,
self.vtype,
self.rtype,
self.args["lhs"],
self.args["rhs"],
)
[docs]
class OpenClComplexMul(OpenClComplexBinaryOperator):
basename = "cmul"
[docs]
def gencode(self):
(s, tg, vec, ftype, vtype, rtype, lhs, rhs) = self.get_attrs()
with s._function_():
with s._align_() as al:
init = ""
for i in range(vec):
real = "{}*{}-{}*{}".format(
lhs[2 * i], rhs[2 * i], lhs[2 * i + 1], rhs[2 * i + 1]
)
imag = "{}*{}+{}*{}".format(
lhs[2 * i], rhs[2 * i + 1], lhs[2 * i + 1], rhs[2 * i]
)
init += f",\n${real},\n${imag}"
ret = f"return ({vtype})({init[2:]});"
al.append(ret)
[docs]
class OpenClComplexModulus2(OpenClComplexUnaryOperator):
basename = "cmodulus2"
[docs]
def determine_output_ctype(
self, ftype, vtype, rtype, vectorization, typegen, args, reqs
):
return rtype
[docs]
def gencode(self):
(s, tg, vec, ftype, vtype, rtype, a) = self.get_attrs()
with s._function_():
with s._align_() as al:
init = ""
for i in range(vec):
real = "{}*{}+{}*{}".format(
a[2 * i], a[2 * i], a[2 * i + 1], a[2 * i + 1]
)
init += f",\n${real}"
ret = f"return ({rtype})({init[2:]});"
al.append(ret)
[docs]
class OpenClComplexModulus(OpenClComplexUnaryOperator):
basename = "cmodulus"
[docs]
def generate_requirements(self, ftype, vtype, rtype, vectorization, typegen, args):
reqs = super().generate_requirements(
ftype, vtype, rtype, vectorization, typegen, args
)
reqs["cmodulus2"] = OpenClComplexModulus2(
typegen=typegen, ftype=ftype, vectorization=vectorization
)
return reqs
[docs]
def determine_output_ctype(
self, ftype, vtype, rtype, vectorization, typegen, args, reqs
):
return rtype
[docs]
def gencode(self):
(s, tg, vec, ftype, vtype, rtype, a) = self.get_attrs()
with s._function_():
call = s.reqs["cmodulus2"](a=a)
ret = f"return sqrt({call});"
s.append(ret)
if __name__ == "__main__":
from hysop.backend.device.codegen.base.test import _test_typegen
tg = _test_typegen("double")
cg = OpenClCodeGenerator("main", tg)
fns = [
OpenClComplexMul(tg, "double", 1),
OpenClComplexMul(tg, "float", 8),
OpenClComplexModulus2(tg, "double", 1),
OpenClComplexModulus2(tg, "float", 4),
OpenClComplexModulus(tg, "float", 2),
]
for f in fns:
cg.require(f.name, f)
cg.edit()
cg.test_compile()